4 Genomic data
4.1 Read duplicates
all_data %>%
select(dataset,Extraction,duplicates,Taxon) %>%
unique() %>%
group_by(Taxon,Extraction) %>%
summarise(value = sprintf("%.1f±%.1f", mean(duplicates), sd(duplicates))) %>%
pivot_wider(names_from = Extraction, values_from = value) %>%
tt(caption = "Mean and standard deviation of fraction of duplicated reads")| Taxon | ZYMO | DREX | EHEX |
|---|---|---|---|
| Amphibian | 0.3±0.2 | 0.2±0.2 | 0.2±0.2 |
| Reptile | 0.5±0.4 | 0.3±0.3 | 0.4±0.4 |
| Mammal | 0.4±0.4 | 0.2±0.1 | 0.2±0.2 |
| Bird | 0.8±0.3 | 0.9±0.1 | 0.6±0.4 |
| Control | 1.0±0.0 | 0.9±0.0 | 1.0±0.0 |
all_data %>%
select(dataset,Extraction,duplicates,Taxon, Species) %>%
mutate(duplicates=duplicates*100) %>%
unique() %>%
ggplot(aes(x=Extraction,y=duplicates, color=Species, group=Extraction)) +
scale_y_reverse() +
geom_boxplot(outlier.shape = NA, fill="#f4f4f4", color="#8c8c8c") +
geom_jitter() +
scale_color_manual(values=vertebrate_colors) +
facet_grid(. ~ Taxon, scales = "free") +
theme_minimal() +
labs(y="Duplication rate (%)",x="Extraction method")all_data %>%
select(dataset,Sample,Species,Extraction,duplicates,Taxon) %>%
filter(Taxon != "Control") %>%
lmerTest::lmer(duplicates ~ Extraction + (1 | Sample) + (1 | Species), data = ., REML = FALSE) %>%
broom.mixed::tidy() %>%
tt()| effect | group | term | estimate | std.error | statistic | df | p.value |
|---|---|---|---|---|---|---|---|
| fixed | NA | (Intercept) | 0.48640693 | 0.07257915 | 6.701744 | 14.33359 | 8.937456e-06 |
| fixed | NA | ExtractionDREX | -0.09279626 | 0.03900562 | -2.379048 | 160.72377 | 1.853073e-02 |
| fixed | NA | ExtractionEHEX | -0.14618630 | 0.03918499 | -3.730671 | 160.73104 | 2.644423e-04 |
| ran_pars | Sample | sd__(Intercept) | 0.00000000 | NA | NA | NA | NA |
| ran_pars | Species | sd__(Intercept) | 0.23148121 | NA | NA | NA | NA |
| ran_pars | Residual | sd__Observation | 0.21005171 | NA | NA | NA | NA |
4.2 Depth of coverage
all_data %>%
select(dataset,Extraction,coverage_depth,Taxon) %>%
unique() %>%
group_by(Taxon,Extraction) %>%
summarise(value = sprintf("%.1f±%.1f", mean(coverage_depth), sd(coverage_depth))) %>%
pivot_wider(names_from = Extraction, values_from = value) %>%
tt(caption = "Mean and standard deviation of fraction of duplicated reads")| Taxon | ZYMO | DREX | EHEX |
|---|---|---|---|
| Amphibian | 0.0±0.0 | 0.0±0.0 | 0.0±0.0 |
| Reptile | 0.2±0.3 | 0.1±0.1 | 0.1±0.1 |
| Mammal | 0.7±1.2 | 0.3±0.4 | 0.5±1.0 |
| Bird | 0.4±0.8 | 0.6±0.5 | 0.8±0.8 |
| Control | 0.0±0.0 | 0.0±0.0 | 0.0±0.0 |
all_data %>%
select(dataset,Extraction,coverage_depth,Taxon, Species) %>%
unique() %>%
ggplot(aes(x=Extraction,y=coverage_depth, color=Species, group=Extraction)) +
geom_boxplot(outlier.shape = NA, fill="#f4f4f4", color="#8c8c8c") +
geom_jitter() +
scale_color_manual(values=vertebrate_colors) +
facet_grid(. ~ Taxon, scales = "free") +
theme_minimal() +
labs(y="Depth of coverage",x="Extraction method")all_data %>%
select(dataset,Sample,Species,Extraction,coverage_depth,Taxon) %>%
unique() %>%
filter(Taxon != "Control") %>%
lmerTest::lmer(coverage_depth ~ Extraction + (1 | Sample) + (1 | Species), data = ., REML = FALSE) %>%
broom.mixed::tidy() %>%
tt()| effect | group | term | estimate | std.error | statistic | df | p.value |
|---|---|---|---|---|---|---|---|
| fixed | NA | (Intercept) | 0.329125000 | 0.1331063 | 2.47264850 | 20.58494 | 0.02222872 |
| fixed | NA | ExtractionDREX | -0.089791667 | 0.1144640 | -0.78445305 | 48.00000 | 0.43662873 |
| fixed | NA | ExtractionEHEX | -0.002791667 | 0.1144640 | -0.02438903 | 48.00000 | 0.98064341 |
| ran_pars | Sample | sd__(Intercept) | 0.408634577 | NA | NA | NA | NA |
| ran_pars | Species | sd__(Intercept) | 0.224731208 | NA | NA | NA | NA |
| ran_pars | Residual | sd__Observation | 0.396515070 | NA | NA | NA | NA |
4.3 Breadth of coverage
all_data %>%
select(dataset,Extraction,coverage_breadth,Taxon) %>%
unique() %>%
group_by(Taxon,Extraction) %>%
summarise(value = sprintf("%.1f±%.1f", mean(coverage_breadth), sd(coverage_breadth))) %>%
pivot_wider(names_from = Extraction, values_from = value) %>%
tt(caption = "Mean and standard deviation of depth of host genome coverage")| Taxon | ZYMO | DREX | EHEX |
|---|---|---|---|
| Amphibian | 0.0±0.0 | 0.0±0.0 | 0.0±0.0 |
| Reptile | 4.9±7.5 | 3.0±5.4 | 2.9±3.7 |
| Mammal | 5.7±5.9 | 10.2±16.4 | 15.1±26.4 |
| Bird | 0.6±0.5 | 3.2±4.4 | 8.9±13.9 |
| Control | 0.0±0.0 | 0.0±0.0 | 0.0±0.0 |
all_data %>%
select(dataset,Extraction,coverage_breadth,Taxon,Species) %>%
unique() %>%
ggplot(aes(x=Extraction,y=coverage_breadth, color=Species, group=Extraction)) +
geom_boxplot(outlier.shape = NA, fill="#f4f4f4", color="#8c8c8c") +
geom_jitter() +
scale_color_manual(values=vertebrate_colors) +
facet_grid(. ~ Taxon, scales = "free") +
theme_minimal() +
labs(y="Breadth of coverage (%)",x="Extraction method")all_data %>%
select(dataset,Extraction,Sample,Species,coverage_breadth,Taxon) %>%
unique() %>%
filter(Taxon != "Control") %>%
lmerTest::lmer(coverage_breadth ~ Extraction + (1 | Sample) + (1 | Species), data = ., REML = FALSE) %>%
broom.mixed::tidy() %>%
tt()| effect | group | term | estimate | std.error | statistic | df | p.value |
|---|---|---|---|---|---|---|---|
| fixed | NA | (Intercept) | 2.799167 | 2.252826 | 1.2425133 | 20.82558 | 0.22785670 |
| fixed | NA | ExtractionDREX | 1.301250 | 1.956426 | 0.6651158 | 48.00000 | 0.50915975 |
| fixed | NA | ExtractionEHEX | 3.918750 | 1.956426 | 2.0030144 | 48.00000 | 0.05084021 |
| ran_pars | Sample | sd__(Intercept) | 7.118462 | NA | NA | NA | NA |
| ran_pars | Species | sd__(Intercept) | 3.549767 | NA | NA | NA | NA |
| ran_pars | Residual | sd__Observation | 6.777259 | NA | NA | NA | NA |
4.4 Breadth vs. coverage
all_data_log <- all_data %>%
mutate(coverage_breadth_log=log(coverage_breadth)) %>%
mutate(coverage_depth_log=log(coverage_depth))
lm_eq <- lm(coverage_breadth_log ~ coverage_depth_log, data = all_data_log %>% filter(coverage_depth_log != -Inf ,coverage_breadth_log != -Inf))
coef <- coef(lm_eq)
all_data_log$coverage_breadth_log_pred <- coef[1] + coef[2] * all_data_log$coverage_depth_log
all_data_log %>%
select(dataset,Extraction,coverage_depth_log,coverage_breadth_log,coverage_breadth_log_pred,Taxon,Species,Sample) %>%
unique() %>%
ggplot(aes(x=coverage_depth_log,y=coverage_breadth_log)) +
geom_point(aes(color=Species, shape=Extraction),size=3, alpha=0.9) +
geom_segment(aes(x = coverage_depth_log, y = coverage_breadth_log, xend = coverage_depth_log, yend = coverage_breadth_log_pred, color=Species), alpha=0.9)+
geom_smooth(method = lm, se = FALSE, color="#666666") +
scale_color_manual(values=vertebrate_colors) +
theme_minimal() +
labs(y="Breadth of coverage (%)",x="Depth of coverage")